In [2]: import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as seabornInstance from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn import metrics from sklearn import linear_model % matplotlib inline In [3]: dataset = pd read_csv ( './FuelConsumption.csv' ) In [4]: dataset plot ( x = 'CYLINDERS' , y = 'CO2EMISSIONS' , style = 'o' ) plt title ( 'CYLINDERS vs CO2EMISSIONS' ) plt xlabel ( 'CYLINDERS' ) plt ylabel ( 'CO2EMISSIONS' ) plt show () In [5]: X = dataset [ 'CYLINDERS' ] values reshape ( - 1 , 1 ) y = dataset [ 'CO2EMISSIONS' ] values reshape ( - 1 , 1 ) In [6]: X_train , X_test , y_train , y_test = train_test_split ( X , y , test_size = 0.2 , random_state = 0 ) In [7]: regressor = LinearRegression () In [8]: regressor fit ( X_train , y_train ) Out[8]: LinearRegression() In [9]: #To retrieve the intercept: print ( regressor intercept_ ) #For retrieving the slope: print ( regressor coef_ ) [84.00737445] [[30.01473943]] In [10]: y_pred = regressor predict ( X_test ) In [11]: df = pd DataFrame ({ 'Actual' : y_test flatten (), 'Predicted' : y_pred flatten ()}) df Out[11]: Actual Predicted 0 356 324.125290 1 209 204.066332 2 230 204.066332 3 212 264.095811 4 168 204.066332 ... ... ... 209 198 204.066332 210 221 204.066332 211 191 204.066332 212 184 204.066332 213 184 204.066332 214 rows × 2 columns In [12]: df1 = df head ( 25 ) df1 plot ( kind = 'bar' , figsize = ( 16 , 10 )) plt grid ( which = 'major' , linestyle = '-' , linewidth = '0.5' , color = 'green' ) plt grid ( which = 'minor' , linestyle = ':' , linewidth = '0.5' , color = 'black' ) plt show () In [13]: plt scatter ( X_test , y_test , color = 'gray' ) plt plot ( X_test , y_pred , color = 'red' , linewidth = 2 ) plt show () In [14]: print ( 'Mean Absolute Error:' , metrics mean_absolute_error ( y_test , y_pred )) print ( 'Mean Squared Error:' , metrics mean_squared_error ( y_test , y_pred )) print ( 'Root Mean Squared Error:' , np sqrt ( metrics mean_squared_error ( y_test , y_pred ))) Mean Absolute Error: 28.193216672104597 Mean Squared Error: 1292.2744990618455 Root Mean Squared Error: 35.948219692522265